***User should have three .dta files ready including:
*Core file from Wave 13-16 (wave13-16_FULL.dta)
*License file from the Topcial module in Wave 13 (Module.dta)
*Data defining ban and non-ban licensing (mandatory_unique.dta)
*Wave 2 that contains history of advanced courses taken (Wave2_topical.dta)


***Line 10 to line 327 are all about data cleaning
***User should start with the core file and run the folloing code
clear
set matsize 1500


************************************ create data template to store values
set obs 9
gen frac=.
gen value=.

save cutoff_graph.dta,replace

clear
*/
********************************88
use "wave13-16_FULL.dta"
keep if swave==13

*****Merge with license
merge m:1 lgtkey using "Module.dta", generate(_licensemerge)


merge m:1 lgtkey using "Wave2_topical", generate(edumerge) keepusing(ecourse1 ecourse2 ecourse3)
drop if edumerge==2
gen math=0
replace math=1 if ecourse1==1
gen sci=0
replace sci=1 if ecourse2==1
gen eng=0
replace eng=1 if ecourse3==1
********************************Data Management***********************
drop if tage<18 
drop if tage >64 

* Work hour<1
drop if ejbhrs1<1


*delege useless license data
drop if  iprocert <1

*Delete imptued license 
drop if iwhopcer <0 

* Define demographic
generate female =1
replace female =0 if esex==1
drop if female==1

generate black=0
replace black=1 if erace==2

generate hispanic=0
replace hispanic=1 if eorigin==1 & black==0

*Create hourly wage
generate long wage = (tpmsum1)/4/ejbhrs1
drop if wage>100
drop if wage<5
gen lwage =log(wage)
generate union=1
replace union=0 if eunion1==2

*Delete imputed union data
drop if eunion1 <0 

*Drop race other than white and black
gen other=0
replace other=1 if erace>2

***Merging with Module data of license (skip)
***Create license dummy (skip)

*Delete imputed licensure source
drop if iwhopcer <0 


*Create gov't summy
generate govt=0
replace govt=1 if inrange(eclwrk1, 3,5)

*service worker?
gen service=0
replace service=1 if inrange(ejbind1,6870,9290)

*Create education dummy
generate hs =0
replace hs=1 if eeducate==39
generate somecollege =0
replace somecollege=1 if inrange(eeducate,40,43)
generate college=0
replace college=1 if eeducate ==44
generate postgrad=0
replace postgrad=1 if eeducate >44

*Create self-employment and big company dummy
generate self_emp=1
replace self_emp=0 if ebuscntr<0

gen age2=tage*tage
rename tage age


*Ensure that the license variable does not contain certified
replace license=0 if cert==1

*** check portion of licensed


*Ensure the license indicator include only license being required by job
gen licensenorequire=0
replace licensenorequire=1 if irjpcert!=1 & license==1
replace license=0 if irjpcert>1

su license

local a=1

foreach i in 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9{
	preserve
	************** Redefine licensing by 50/50 rule
	egen licensecheck=mean(license), by(tfipsst tjbocc1)


	replace license=0 if licensecheck<`i'
	replace license=1 if licensecheck>=`i'
	
	
	keep if apmsum1==0 & inrange(wage,5,100)
	***************** Race selection
	drop if other==1
	drop if hispanic==1 & black==0
	
	su license
	
	local m=r(mean)
	
	clear
	
	use cutoff_graph.dta
	
	replace frac=`i' if _n==`a'
	replace value=`m' if _n==`a'
	
	save cutoff_graph.dta,replace
	*/
	
	restore

	local a=`a'+1
}

clear

	use cutoff_graph.dta
	
	replace frac=frac*100
	replace value=value*100


graph bar value, over(frac, descending) yline(12.8) yline(28.4) ytitle("Percent of licensed workers (%)") b1title("Threshold (%)") text(13.5 30 "SIPP: 12.7%")  text(29.2 30 "Kleiner & Krueger (JoLE,2013): 28.4%") graphregion(color(white)) bgcolor(white)
graph export cutoffReverse.png, as(png) replace
